import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
df = pd.read_csv('cmpinf_2120_binary_classification.csv')
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 300 entries, 0 to 299 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 x1 300 non-null float64 1 x2 300 non-null float64 2 x3 300 non-null object 3 y 300 non-null int64 dtypes: float64(2), int64(1), object(1) memory usage: 9.5+ KB
df.nunique()
x1 300 x2 300 x3 3 y 2 dtype: int64
sns.catplot(data = df, x='x3', kind='count')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = df, x='y', kind='count')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
df.y.value_counts(normalize=True)
y 0 0.553333 1 0.446667 Name: proportion, dtype: float64
sns.displot(data = df, x='x1', kind='hist')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.displot(data = df, x='x2', kind='hist')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = df, kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
We saw how to VISUALIZE the output to input relationship with LOGISTIC TREND in Seaborn.
But BEFORE examining the TREND, we SHOULD examine the CONDITIONAL DISTRIBUTIONS of the INPUTS GIVEN the categorical input and binary outcome!
sns.catplot(data = df, x='x3', y='x1', kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = df, x='x3', y='x2', kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = df, x='y', y='x1', kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = df, x='y', y='x2', kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
But reshaping can allow us to explore MULTIPLE inputs at the same time!
lf = df.reset_index().rename(columns={'index': 'rowid'}).\
melt(id_vars=['rowid', 'y', 'x3'])
lf
| rowid | y | x3 | variable | value | |
|---|---|---|---|---|---|
| 0 | 0 | 1 | A | x1 | -0.173789 |
| 1 | 1 | 1 | A | x1 | -0.388312 |
| 2 | 2 | 1 | A | x1 | -0.730844 |
| 3 | 3 | 0 | A | x1 | -1.336667 |
| 4 | 4 | 0 | A | x1 | -0.443531 |
| ... | ... | ... | ... | ... | ... |
| 595 | 295 | 0 | C | x2 | 0.511348 |
| 596 | 296 | 1 | C | x2 | 1.024983 |
| 597 | 297 | 1 | C | x2 | -0.003897 |
| 598 | 298 | 1 | C | x2 | 1.620378 |
| 599 | 299 | 1 | C | x2 | -0.813426 |
600 rows × 5 columns
sns.catplot(data = lf, x='y', y='value', col='variable', kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = lf, x='x3', y='value', col='variable', kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = lf, x='y', y='value', col='variable', hue='x3', kind='box')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.catplot(data = lf, x='y', y='value', col='variable', hue='x3', kind='point', join=False, dodge=True)
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.lmplot(data = lf, x='value', y='y', col='variable', logistic=True)
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.lmplot(data = lf, x='value', y='y', col='variable', hue='x3', logistic=True)
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
However, we are still not done exploring! We also need to examine the continuous input to continuous input relationship!
sns.relplot(data = df, x='x1', y='x2')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.relplot(data = df, x='x1', y='x2', hue='y')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
But facet by the CATEGORICAL INPUT!!
sns.relplot(data = df, x='x1', y='x2', hue='y', col='x3')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
This figure suggests we need an INTERACTION between the categorical input and the continuous inputs!!!
This figure also suggests STRAIGHT LINES will NOT be sufficient!!!
Start with the SIMPLEST and work our way up to COMPLEX models!!!
fit_0 = smf.logit(formula='y ~ 1', data=df).fit()
Optimization terminated successfully.
Current function value: 0.687447
Iterations 4
fit_0.params
Intercept -0.214148 dtype: float64
np.exp( -0.214 ) / ( 1 + np.exp( -0.214) )
0.4467032431118971
df.y.mean()
0.44666666666666666
Next, only consider the CATEGORICAL INPUTS.
fit_1 = smf.logit(formula='y ~ x3', data=df).fit()
Optimization terminated successfully.
Current function value: 0.686727
Iterations 4
fit_1.params
Intercept -0.322773 x3[T.B] 0.162431 x3[T.C] 0.162431 dtype: float64
fit_1.pvalues
Intercept 0.111143 x3[T.B] 0.568922 x3[T.C] 0.568922 dtype: float64
def my_coefplot(mod, figsize_use=(10,4)):
fig, ax = plt.subplots(figsize=figsize_use)
ax.errorbar(y =mod.params.index,
x =mod.params,
xerr = 2 * mod.bse,
fmt='o', color='k', ecolor='k', elinewidth=2, ms=10)
ax.axvline(x=0, linestyle='--', color='grey', linewidth=3.5)
ax.set_xlabel('coefficient value')
plt.show()
my_coefplot( fit_1 )
Next, linear additive features for the continuous inputs!
fit_2 = smf.logit(formula='y ~ x1 + x2',data=df).fit()
Optimization terminated successfully.
Current function value: 0.681597
Iterations 4
my_coefplot(fit_2)
Next, linear additive features using ALL inputs! The MIXED input model.
fit_3 = smf.logit(formula='y ~ x1 + x2 + x3', data=df).fit()
Optimization terminated successfully.
Current function value: 0.680710
Iterations 4
my_coefplot(fit_3)
We can include MORE FEATURES derived from the inputs!!!!!
Begin with the interaction between the categorical and continuous inputs!
fit_4 = smf.logit(formula='y ~ x3 * (x1 + x2)',data=df).fit()
Optimization terminated successfully.
Current function value: 0.673145
Iterations 5
my_coefplot(fit_4)
Interact the continuous inputs but ADD the categorical input.
fit_5 = smf.logit(formula='y ~ x3 + (x1 + x2)**2',data=df).fit()
Optimization terminated successfully.
Current function value: 0.680128
Iterations 4
my_coefplot(fit_5)
Interact the categorical with the MAIN EFFECTS and the INTERACTIONS from the continuous inputs.
fit_6 = smf.logit(formula='y ~ x3 * ( (x1 + x2)**2 )',data=df).fit()
Optimization terminated successfully.
Current function value: 0.633343
Iterations 6
my_coefplot(fit_6)
Circular regions between 2 continuous inputs correspond to including QUADRATIC features!
fit_7 = smf.logit(formula='y ~ x3 + x1 + np.power(x1, 2) + x2 + np.power(x2, 2)', data=df).fit()
Optimization terminated successfully.
Current function value: 0.568805
Iterations 6
my_coefplot(fit_7)
Interact the linear continuous inputs while still using the quadratic features.
fit_8 = smf.logit(formula='y ~ x3 + x1 * x2 + np.power(x1, 2) + np.power(x2, 2)',data=df).fit()
Optimization terminated successfully.
Current function value: 0.568557
Iterations 6
my_coefplot(fit_8)
Interact the categorical with ALL features derived from the continuous inputs!
fit_9 = smf.logit(formula='y ~ x3 * ( x1 * x2 + np.power(x1, 2) + np.power(x2, 2) )',data=df).fit()
Optimization terminated successfully.
Current function value: 0.340410
Iterations 9
my_coefplot(fit_9)
fit_9.pvalues < 0.05
Intercept True x3[T.B] False x3[T.C] True x1 False x3[T.B]:x1 False x3[T.C]:x1 False x2 False x3[T.B]:x2 False x3[T.C]:x2 False x1:x2 False x3[T.B]:x1:x2 False x3[T.C]:x1:x2 True np.power(x1, 2) True x3[T.B]:np.power(x1, 2) False x3[T.C]:np.power(x1, 2) False np.power(x2, 2) True x3[T.B]:np.power(x2, 2) False x3[T.C]:np.power(x2, 2) True dtype: bool
Let's add in MORE polynomials derived from the continuous inputs!
fit_10 = smf.logit(formula='y ~ x3 * ( x1 * x2 + np.power(x1, 2) + np.power(x2, 2) + np.power(x1, 3) + np.power(x2, 3) )', data=df).fit()
Optimization terminated successfully.
Current function value: 0.325105
Iterations 10
my_coefplot( fit_10 )
We can interact the linear and quadratic features between the 2 continuous inputs while adding the cubic features.
fit_11 = smf.logit(formula='y ~ x3 * ( (x1 + np.power(x1, 2)) * (x2 + np.power(x2, 2)) + np.power(x1, 3) + np.power(x2, 3) )', data=df).fit()
Optimization terminated successfully.
Current function value: 0.312227
Iterations 12
my_coefplot( fit_11 )
fit_11.params.size
33
We have yet to discuss in this course how to decide which model is best...
For now, let's examine model behavior through predictions!!!!
Define a visualization grid that has many combinations between the 2 continuous inputs and all unique values of the categorical input.
input_grid = pd.DataFrame([ (x1, x2, x3) for x1 in np.linspace(df.x1.min(), df.x1.max(), num=101)
for x2 in np.linspace(df.x2.min(), df.x2.max(), num=101)
for x3 in df.x3.unique()],
columns=['x1', 'x2', 'x3'])
input_grid.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 30603 entries, 0 to 30602 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 x1 30603 non-null float64 1 x2 30603 non-null float64 2 x3 30603 non-null object dtypes: float64(2), object(1) memory usage: 717.4+ KB
input_grid.nunique()
x1 101 x2 101 x3 3 dtype: int64
sns.relplot(data = input_grid, x='x1', y='x2', col='x3', s=7)
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
Let's define a function that makes the predictions and generates the figure that we want.
def viz_predict_prob_surface(a_mod, input_df):
# assume a_mod is a statsmodels object
# create a copy of the input dataframe
viz_df = input_df.copy()
# make prediction and assign to a colum pred_prob
viz_df['pred_prob'] = a_mod.predict( input_df )
# visualize the surface as a scatter plot
sns.relplot(data = viz_df, x='x1', y='x2', col='x3', hue='pred_prob',
palette='coolwarm')
plt.show()
Visualize the predictions for ALL models!
viz_predict_prob_surface(fit_0, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
viz_predict_prob_surface(fit_1, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_1.params
Intercept -0.322773 x3[T.B] 0.162431 x3[T.C] 0.162431 dtype: float64
viz_predict_prob_surface(fit_2, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_2.params
Intercept -0.216981 x1 -0.101307 x2 0.205841 dtype: float64
viz_predict_prob_surface(fit_3, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_3.params
Intercept -0.331145 x3[T.B] 0.206531 x3[T.C] 0.134536 x1 -0.105616 x2 0.209683 dtype: float64
viz_predict_prob_surface(fit_4, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_4.params
Intercept -0.331918 x3[T.B] 0.257901 x3[T.C] 0.174916 x1 -0.138166 x3[T.B]:x1 -0.085401 x3[T.C]:x1 0.103721 x2 0.122932 x3[T.B]:x2 0.477475 x3[T.C]:x2 -0.154773 dtype: float64
viz_predict_prob_surface(fit_5, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_5.params
Intercept -0.327547 x3[T.B] 0.209127 x3[T.C] 0.123909 x1 -0.103413 x2 0.211844 x1:x2 -0.070494 dtype: float64
viz_predict_prob_surface(fit_6, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_6.params
Intercept -0.330708 x3[T.B] 0.180200 x3[T.C] 0.087784 x1 -0.135538 x3[T.B]:x1 -0.047467 x3[T.C]:x1 0.067358 x2 0.120629 x3[T.B]:x2 0.372966 x3[T.C]:x2 -0.369111 x1:x2 -0.019329 x3[T.B]:x1:x2 0.663544 x3[T.C]:x1:x2 -1.038822 dtype: float64
fit_6.pvalues < 0.05
Intercept False x3[T.B] False x3[T.C] False x1 False x3[T.B]:x1 False x3[T.C]:x1 False x2 False x3[T.B]:x2 False x3[T.C]:x2 False x1:x2 False x3[T.B]:x1:x2 False x3[T.C]:x1:x2 True dtype: bool
CLASSIFICATIONS or DECISIONS are MADE BY COMPARING the PREDICTED PROBABILITY to a THRESHOLD.
If the PREDICTED PROBABILITY is GREATER THAN the threshold we CLASSIFY the event!!!
def viz_predict_class_surface(a_mod, input_df):
# assume a_mod is a statsmodels object
# create a copy of the input dataframe
viz_df = input_df.copy()
# make prediction and assign to a colum pred_prob
viz_df['pred_prob'] = a_mod.predict( input_df )
# classify - using the default threshold
viz_df['pred_class'] = np.where( viz_df.pred_prob > 0.5, 1, 0 )
viz_df['pred_class'] = viz_df.pred_class.astype('category')
# visualize the surface as a scatter plot
sns.relplot(data = viz_df, x='x1', y='x2', col='x3', hue='pred_class')
plt.show()
viz_predict_class_surface( fit_6, input_grid )
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
viz_predict_prob_surface(fit_7, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_7.params
Intercept 0.875627 x3[T.B] 0.173488 x3[T.C] 0.194616 x1 -0.001487 np.power(x1, 2) -0.667651 x2 0.275804 np.power(x2, 2) -0.797702 dtype: float64
viz_predict_class_surface(fit_7, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
viz_predict_prob_surface(fit_8, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
viz_predict_prob_surface(fit_9, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
fit_9.params
Intercept 3.978235 x3[T.B] -1.919492 x3[T.C] -2.275757 x1 -0.314989 x3[T.B]:x1 0.184381 x3[T.C]:x1 0.656449 x2 0.318067 x3[T.B]:x2 0.496314 x3[T.C]:x2 -0.624434 x1:x2 0.446267 x3[T.B]:x1:x2 3.051145 x3[T.C]:x1:x2 -4.479596 np.power(x1, 2) -3.328794 x3[T.B]:np.power(x1, 2) 1.537787 x3[T.C]:np.power(x1, 2) 1.170873 np.power(x2, 2) -4.301378 x3[T.B]:np.power(x2, 2) 2.080366 x3[T.C]:np.power(x2, 2) 2.600413 dtype: float64
Another type of binary classifier are the LDA and QDA models. We are not using those here because they are essentially logistic regression models with quadratic features (LDA) and logistic regression models with quadratic features and interactions between continuous inputs (QDA).
viz_predict_class_surface(fit_9, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
viz_predict_prob_surface(fit_10, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
viz_predict_prob_surface(fit_11, input_grid)
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
We fit 12 models!
models_list = [fit_0, fit_1, fit_2, fit_3, fit_4, fit_5, fit_6, fit_7, fit_8, fit_9, fit_10, fit_11]
len(models_list)
12
models_list[0].params
Intercept -0.214148 dtype: float64
models_list[0].params.size
1
[mod.params.size for mod in models_list]
[1, 3, 3, 5, 9, 6, 12, 7, 8, 18, 24, 33]
The penalty is larger for models with more unknowns to estimate!
[mod.aic for mod in models_list]
[414.46847267250416, 418.0359034217905, 414.95804859181453, 418.42577803602103, 421.88674429345497, 420.0769547811328, 404.00563481351185, 355.2832312360324, 357.1342358032212, 240.24610642262985, 243.06323706023355, 253.33636245031911]
[mod.bic for mod in models_list]
[418.17225514716034, 429.1472508457591, 426.0693960157831, 436.94469040930204, 455.22078656536075, 442.29964962907, 448.45102450938623, 381.20970855862583, 386.7644956004708, 306.9141909664415, 331.9540164519824, 375.56118411397375]
[mod.llf for mod in models_list]
[-206.23423633625208, -206.01795171089526, -204.47902429590727, -204.21288901801051, -201.94337214672748, -204.0384773905664, -190.00281740675592, -170.6416156180162, -170.5671179016106, -102.12305321131493, -97.53161853011677, -93.66818122515956]
Compile the results in a DataFrame.
model_results_df = pd.DataFrame({'model_name': list( range(0, len(models_list) ) ),
'num_coefs': [mod.params.size for mod in models_list],
'LogLik': [mod.llf for mod in models_list],
'AIC': [mod.aic for mod in models_list],
'BIC': [mod.bic for mod in models_list]})
model_results_df
| model_name | num_coefs | LogLik | AIC | BIC | |
|---|---|---|---|---|---|
| 0 | 0 | 1 | -206.234236 | 414.468473 | 418.172255 |
| 1 | 1 | 3 | -206.017952 | 418.035903 | 429.147251 |
| 2 | 2 | 3 | -204.479024 | 414.958049 | 426.069396 |
| 3 | 3 | 5 | -204.212889 | 418.425778 | 436.944690 |
| 4 | 4 | 9 | -201.943372 | 421.886744 | 455.220787 |
| 5 | 5 | 6 | -204.038477 | 420.076955 | 442.299650 |
| 6 | 6 | 12 | -190.002817 | 404.005635 | 448.451025 |
| 7 | 7 | 7 | -170.641616 | 355.283231 | 381.209709 |
| 8 | 8 | 8 | -170.567118 | 357.134236 | 386.764496 |
| 9 | 9 | 18 | -102.123053 | 240.246106 | 306.914191 |
| 10 | 10 | 24 | -97.531619 | 243.063237 | 331.954016 |
| 11 | 11 | 33 | -93.668181 | 253.336362 | 375.561184 |
sns.relplot(data = model_results_df, x='model_name', y='LogLik')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.relplot(data = model_results_df, x='model_name', y='AIC')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sns.relplot(data = model_results_df, x='model_name', y='BIC')
plt.show()
C:\Users\jyurk\anaconda3\envs\cmpinf2120\lib\site-packages\seaborn\axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
AIC and BIC agree, model 9 is considered the BEST!!!!!!